After having run the iLand simulations for the 12 clusters the resulting .sqlite files need to be evaluated. Goal is to see how the tree development of each cluster develops.
To be able to load the data, fist a connection to the .sqlite file needs to be established. For this the library “RSQLite” is needed.
library(rstatix)
Attaching package: ‘rstatix’
The following object is masked from ‘package:stats’:
filter
With the necessary libraries loaded, the data can be assessed. First a connection to the database is established, afterwards the tables inside can be read.
setwd("rawData/iLand/iLand_output")
Warning: The working directory was changed to C:/Users/ge45lep/Documents/2023_PanEuropean/r_paneurop/rawData/iLand/iLand_output inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
getwd()
[1] "C:/Users/ge45lep/Documents/2023_PanEuropean/r_paneurop/rawData/iLand/iLand_output"
# Get output dirs and run through them
lDirs <- list.dirs(recursive = FALSE)
lDirs <- grep("_new", lDirs, value = TRUE)
DBTables <- list()
DBT_sap <- list()
DBT_sapd <- list()
iFiles <- 1
iDirs <- 1
bExtSeed <- FALSE
# Outer loop runs through all directories at that path and loads .sqlite files
# into a list. Also a counter (iDirs) counts the number of directories accessed
for (l_dir in lDirs) {
lFiles <- list.files(path = l_dir, pattern = ".sqlite", full.names = TRUE)
# Inner loop runs through all files (.sqlite) in a directory and loads the
# stand table into a variable.
# File for cluster 3_2 is skipped, due to this stand being empty in run
# without external seeeds
for (lFile in lFiles) {
l_db <- dbConnect(RSQLite::SQLite(), lFile)
l_dfStnd <- dbReadTable(l_db, "stand")
l_dfSapl <- dbReadTable(l_db, "sapling")
l_dfSapD <- dbReadTable(l_db, "saplingdetail")
dbDisconnect(l_db)
bExtSeed <- !grepl("noseed", lFile)
if (nrow(l_dfStnd) == 0) {
next
}
l_dfStnd$filenm <- tools::file_path_sans_ext(basename(lFile))
l_dfStnd <- l_dfStnd %>%
dplyr::select(filenm,
year,
species,
volume_m3,
dbh_avg_cm,
basal_area_m2,
count_ha,
cohort_count_ha,
cohort_basal_area)
l_dfStnd$run_nr <- iDirs
l_dfStnd$external_seed <- bExtSeed
l_dfSapl$filenm <- tools::file_path_sans_ext(basename(lFile))
l_dfSapD$filenm <- tools::file_path_sans_ext(basename(lFile))
DBTables[[iFiles]] <- l_dfStnd
DBT_sap[[iFiles]] <- l_dfSapl
DBT_sapd[[iFiles]] <- l_dfSapD
iFiles <- iFiles + 1
#dbDisconnect(l_db)
}
iDirs <- iDirs + 1
}
# Bring all the data together in big_data variable
OG_big_data <- bind_rows(DBTables)
OG_sapl <- bind_rows(DBT_sap)
OG_sapd <- bind_rows(DBT_sapd)
# Cleanup of some variables
rm(DBTables, l_db, l_dfStnd, iDirs, iFiles, l_dir, lDirs, lFile, lFiles,
bExtSeed, DBT_sapd, DBT_sap, l_dfSapl, l_dfSapD)
For further processing it might be usefull to split the filename into a few pieces:
Climate model
Cluster
With or without external seed
# 1. stand table
big_data <- OG_big_data |>
separate_wider_delim(filenm, delim = "_", names = c("clim_model",
"clim_scenario",
"env_clst",
"stnd_clst",
"ext_seed"))
big_data <- big_data %>% dplyr::select(!ext_seed)
big_data$cluster <- paste(big_data$env_clst, big_data$stnd_clst, sep = "_")
big_data <- big_data %>%
dplyr::select(!c(env_clst, stnd_clst))
big_data$clim_scenario <- as.factor(big_data$clim_scenario)
big_data$clim_model <- as.factor(big_data$clim_model)
big_data$cluster <- as.factor(big_data$cluster)
big_data$species <- as.factor(big_data$species)
strCols <- c("volume_m3", "dbh_avg_cm", "basal_area_m2",
"count_ha", "cohort_count_ha", "cohort_basal_area")
w_means <- big_data %>%
dplyr::group_by(clim_scenario, external_seed, cluster, year, species) %>%
dplyr::summarise(across(strCols, list(mean = mean, sd = sd)),.groups = "drop")
Warning: There was 1 warning in `dplyr::summarise()`.
i In argument: `across(strCols, list(mean = mean, sd = sd))`.
Caused by warning:
! Using an external vector in selections was deprecated in tidyselect 1.1.0.
i Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(strCols)
# Now:
data %>% select(all_of(strCols))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
# 2. sapling table
big_sapl <- OG_sapl |>
separate_wider_delim(filenm, delim = "_", names = c("clim_model",
"clim_scenario",
"env_clst",
"stnd_clst",
"ext_seed"))
big_sapl$cluster <- paste(big_sapl$env_clst, big_sapl$stnd_clst, sep = "_")
big_sapl$clim_model <- as.factor(big_sapl$clim_model)
big_sapl$clim_scenario <- as.factor(big_sapl$clim_scenario)
big_sapl$cluster <- as.factor(big_sapl$cluster)
big_sapl$species <- as.factor(big_sapl$species)
big_sapl$ext_seed <- as.factor(big_sapl$ext_seed)
big_sapl <- big_sapl %>% dplyr::select(!c(env_clst, stnd_clst, ru, rid))
strCols <- c("count_ha", "count_small_ha", "cohort_count_ha", "height_avg_m",
"age_avg", "LAI")
w_means_sapl <- big_sapl %>%
dplyr::group_by(clim_scenario, ext_seed, cluster, year, species) %>%
dplyr::summarise(across(strCols, list(mean = mean, sd = sd)),.groups = "drop")
# 3. saplingdetails table
big_sapD <- OG_sapd |>
separate_wider_delim(filenm, delim = "_", names = c("clim_model",
"clim_scenario",
"env_clst",
"stnd_clst",
"ext_seed"))
big_sapD$cluster <- paste(big_sapD$env_clst, big_sapD$stnd_clst, sep = "_")
big_sapD$clim_model <- as.factor(big_sapD$clim_model)
big_sapD$clim_scenario <- as.factor(big_sapD$clim_scenario)
big_sapD$cluster <- as.factor(big_sapD$cluster)
big_sapD$species <- as.factor(big_sapD$species)
big_sapD$ext_seed <- as.factor(big_sapD$ext_seed)
big_sapD <- big_sapD %>% dplyr::select(!c(env_clst, stnd_clst, ru, rid))
strCols <- c("age", "height", "dbh", "n_represented")
w_means_sapD <- big_sapD %>%
dplyr::group_by(clim_scenario, ext_seed, cluster, year, species) %>%
dplyr::summarise(across(strCols, list(mean = mean, sd = sd)),.groups = "drop")
# clean environment
rm(strCols)
Effect strength of all variables that influence the number of trees is calculated to identify which can be included in the mean calculation.
ggplot(w_means, aes(x = count_ha_mean)) + geom_histogram()
ggplot(w_means, aes(x = count_ha_mean)) + geom_density()
# test for varianzhomogeniocity (if p < 0.05 -> no homogeneity of variance)
w_means %>% levene_test(count_ha_mean ~ clim_scenario) #homogeneity of variance
w_means %>% levene_test(volume_m3_mean ~ clim_scenario) #homogeneity of variance
w_means %>% levene_test(count_ha_mean ~ external_seed) #no homogeneity of var
Warning: group coerced to factor.
w_means %>% levene_test(count_ha_mean ~ cluster) #no homogeneity of var
w_means %>% levene_test(count_ha_mean ~ species) #no homogeneity of var
# test for effect strength - var.equal = FALSE -> no homogeneity of variance
w_means %>% cohens_d(count_ha_mean ~ clim_scenario, var.equal = TRUE) #negligible effect
w_means %>% cohens_d(volume_m3_mean ~ clim_scenario, var.equal = TRUE) #negligible effect